from modelscope.msdatasets import MsDataset
import json
import random

random.seed(42)

ds = MsDataset.load('Data/CatalystData', subset_name='default', split='train')

data_list = list(ds)


random.shuffle(data_list)


split_idx = int(len(data_list) * 0.85)

train_data = data_list[:split_idx]
val_data = data_list[split_idx:]


with open('Data/train.jsonl', 'w', encoding='utf-8') as f:
    for item in train_data:
        json.dump(item, f, ensure_ascii=False)
        f.write('\n')

with open('Data/val.jsonl', 'w', encoding='utf-8') as f:
    for item in val_data:
        json.dump(item, f, ensure_ascii=False)
        f.write('\n')
